package crawl;

import org.apache.commons.io.FileUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;

/**
 * 根据CSS样式抓取链接中的图片
 */
public class ImageCrawl {
	private static String url = "http://yhdm80.com/";

	public static void main(String[] args) throws IOException {
		// apacheHttpClient();
		Document document = Jsoup.connect(url).get(); //拿到页面内容,与上面方法等效
		Elements elements = document.select("li.mb");
		for (org.jsoup.nodes.Element element : elements) {
			Elements select = element.select("a img");
			// Connection.Response response = Jsoup.connect(select.attr("src"))
			Connection.Response response = Jsoup.connect(select.attr("data-original"))
					.userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36")
					.ignoreContentType(true) //省去contentType
					.execute();
			String name = select.attr("alt");
			String path = ImageCrawl.class.getResource("/").getPath() + "/resources";
			try (ByteArrayInputStream inputStream = new ByteArrayInputStream(response.bodyAsBytes())) {
				// 注意，多线程下不建议使用这个方法，容易内存溢出
				FileUtils.copyInputStreamToFile(inputStream, new File(path + File.separator + name + ".png"));
			}
		}
	}

	// 读取页面内容
	private static void apacheHttpClient() {
		HttpClient client = HttpClients.createDefault();

		HttpGet httpGet = new HttpGet(url);
		// 用户代理，随便找一个网站复制过来的
		httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36");

		try {
			HttpResponse response = client.execute(httpGet);
			HttpEntity entity = response.getEntity();
			System.out.println(EntityUtils.toString(entity)); // 获取html页面内容
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
	}

}
